# IMPORT LIBRARIES and PACKAGES
import os
import re
import random
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import PIL #Python Imaging Library
import tensorflow as tf
import tensorflow.compat.v2 as tf
import tensorflow_datasets as tfds
#tfds.disable_progress_bar()
from glob import glob
from tensorflow import keras
from tensorflow.keras import datasets, layers, models
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, Flatten, Dropout, MaxPooling2D
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from IPython.display import display # display images
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, classification_report
%matplotlib inline
IMAGE CLASSIFICATION - 3 CNN Models¶
AI company has a 2 GB dataset containing over 15,000 images of indoor locations. Originally from MIT, this dataset was built to work on indoor scene recognition. There are 67 categories of JPEG images. The number of images per category varies, but there are at least 100 images for each category.
Data was obtained from: https://www.kaggle.com/itsahmad/indoor-scenes-cvpr-2019
Assign Path to Previously Downloaded Images or Download Images¶
import pathlib
from zipfile import ZipFile
from google.colab import drive
# Note: tried to use jupyter notebook from anaconda directly but a recent update caused errors, so using Colab with Gdrive instead.
drive.mount('/content/gdrive')
Mounted at /content/gdrive
# Note deleted this cell and repasted it when exporting to html to hide the output
!unzip gdrive/MyDrive/archive.zip
#set the parent folder that contains the subfolders that are the lables
img_dir = pathlib.Path('archive/indoorCVPR_09/Images/')
print(f'The indoorCVPR_09 photes are stored in local directory : {img_dir}')
The indoorCVPR_09 photes are stored in local directory : archive/indoorCVPR_09/Images
List the directories to double-check¶
total_files = 0
for root, dirs, files in os.walk(str(img_dir)):
level = root.replace(str(img_dir), '').count(os.sep)
indent = ' ' * 4 * (level)
print(f'{indent}{os.path.basename(root)}/ ({len(files)} files)')
total_files += len(files)
print(f'There are {total_files} images in this dataset')
Images/ (0 files)
deli/ (258 files)
florist/ (103 files)
artstudio/ (140 files)
concert_hall/ (103 files)
classroom/ (113 files)
poolinside/ (174 files)
trainstation/ (153 files)
corridor/ (346 files)
waitingroom/ (151 files)
videostore/ (110 files)
church_inside/ (180 files)
dentaloffice/ (131 files)
airport_inside/ (608 files)
elevator/ (101 files)
computerroom/ (114 files)
auditorium/ (176 files)
bakery/ (405 files)
museum/ (168 files)
inside_subway/ (457 files)
closet/ (135 files)
warehouse/ (506 files)
kitchen/ (734 files)
office/ (109 files)
kindergarden/ (127 files)
jewelleryshop/ (157 files)
toystore/ (347 files)
restaurant_kitchen/ (107 files)
hospitalroom/ (101 files)
movietheater/ (175 files)
bedroom/ (662 files)
bookstore/ (380 files)
bathroom/ (197 files)
fastfood_restaurant/ (116 files)
garage/ (103 files)
gameroom/ (127 files)
grocerystore/ (213 files)
operating_room/ (135 files)
subway/ (539 files)
buffet/ (111 files)
gym/ (231 files)
shoeshop/ (116 files)
greenhouse/ (101 files)
tv_studio/ (166 files)
winecellar/ (269 files)
dining_room/ (274 files)
mall/ (176 files)
stairscase/ (155 files)
restaurant/ (513 files)
bar/ (604 files)
clothingstore/ (106 files)
cloister/ (120 files)
studiomusic/ (108 files)
casino/ (515 files)
nursery/ (144 files)
meeting_room/ (233 files)
pantry/ (384 files)
prisoncell/ (103 files)
hairsalon/ (239 files)
children_room/ (112 files)
bowling/ (213 files)
inside_bus/ (102 files)
There are 14056 images in this dataset
Get the Indoor Image label using the Image directory¶
IndoorImage_dir = [ name for name in list(os.listdir(img_dir)) if os.path.isdir(os.path.join(img_dir, name)) ]
print(f' The Indoor Image labels = {IndoorImage_dir}')
# SORT the directories in alphabetical order
IndoorImage_dir.sort()
print(f'\n The SORTED Indoor Image labels = {IndoorImage_dir}')
print(f'\nThere are {len(IndoorImage_dir)} classes of Indoor Images.') # Confirmed 61 total target classes
The Indoor Image labels = ['deli', 'florist', 'artstudio', 'concert_hall', 'classroom', 'poolinside', 'trainstation', 'corridor', 'waitingroom', 'videostore', 'church_inside', 'dentaloffice', 'airport_inside', 'elevator', 'computerroom', 'auditorium', 'bakery', 'museum', 'inside_subway', 'closet', 'warehouse', 'kitchen', 'office', 'kindergarden', 'jewelleryshop', 'toystore', 'restaurant_kitchen', 'hospitalroom', 'movietheater', 'bedroom', 'bookstore', 'bathroom', 'fastfood_restaurant', 'garage', 'gameroom', 'grocerystore', 'operating_room', 'subway', 'buffet', 'gym', 'shoeshop', 'greenhouse', 'tv_studio', 'winecellar', 'dining_room', 'mall', 'stairscase', 'restaurant', 'bar', 'clothingstore', 'cloister', 'studiomusic', 'casino', 'nursery', 'meeting_room', 'pantry', 'prisoncell', 'hairsalon', 'children_room', 'bowling', 'inside_bus'] The SORTED Indoor Image labels = ['airport_inside', 'artstudio', 'auditorium', 'bakery', 'bar', 'bathroom', 'bedroom', 'bookstore', 'bowling', 'buffet', 'casino', 'children_room', 'church_inside', 'classroom', 'cloister', 'closet', 'clothingstore', 'computerroom', 'concert_hall', 'corridor', 'deli', 'dentaloffice', 'dining_room', 'elevator', 'fastfood_restaurant', 'florist', 'gameroom', 'garage', 'greenhouse', 'grocerystore', 'gym', 'hairsalon', 'hospitalroom', 'inside_bus', 'inside_subway', 'jewelleryshop', 'kindergarden', 'kitchen', 'mall', 'meeting_room', 'movietheater', 'museum', 'nursery', 'office', 'operating_room', 'pantry', 'poolinside', 'prisoncell', 'restaurant', 'restaurant_kitchen', 'shoeshop', 'stairscase', 'studiomusic', 'subway', 'toystore', 'trainstation', 'tv_studio', 'videostore', 'waitingroom', 'warehouse', 'winecellar'] There are 61 classes of Indoor Images.
Please check the images quality and remove the bad images if neccessary¶
If we don't remove the bad images, the codes will crash and produce errors.
import glob
img_paths = glob.glob(os.path.join(img_dir,'*/*.*')) # assuming you point to the directory containing the label folders.
bad_paths = []
for image_path in img_paths:
try:
img_bytes = tf.io.read_file(image_path)
decoded_img = tf.io.decode_image(img_bytes)
except tf.errors.InvalidArgumentError as e:
print(f"Found bad path {image_path}...{e}")
bad_paths.append(image_path)
#print(f"{image_path}: OK")
print("BAD PATHS:")
for bad_path in bad_paths:
print(f"{bad_path}")
BAD PATHS:
Display Images¶
# Set the seed for reproducible results
SEED = 777
os.environ['PYTHONHASHSEED']=str(SEED)
os.environ['TF_CUDNN_DETERMINISTIC'] = '1' # new flag for tf 2.0+
random.seed(SEED)
np.random.seed(SEED)
tf.random.set_seed(SEED)
for i in range(len(IndoorImage_dir)):
# View first image from every folder
image_file = list(img_dir.glob(IndoorImage_dir[i]+'/*'))
img = PIL.Image.open(str(image_file[0]))
print(f'(Image size = ({img.size[0]}, {img.size[1]}, {len(img.mode)}) ; IndoorsPlace = {IndoorImage_dir[i]})')
display(img)
(Image size = (500, 381, 3) ; IndoorsPlace = airport_inside)
(Image size = (300, 296, 3) ; IndoorsPlace = artstudio)
(Image size = (595, 425, 3) ; IndoorsPlace = auditorium)
(Image size = (350, 263, 3) ; IndoorsPlace = bakery)
(Image size = (768, 576, 3) ; IndoorsPlace = bar)
(Image size = (340, 512, 3) ; IndoorsPlace = bathroom)
(Image size = (256, 256, 3) ; IndoorsPlace = bedroom)
(Image size = (256, 256, 3) ; IndoorsPlace = bookstore)
(Image size = (488, 446, 3) ; IndoorsPlace = bowling)
(Image size = (256, 256, 3) ; IndoorsPlace = buffet)
(Image size = (400, 300, 3) ; IndoorsPlace = casino)
(Image size = (641, 481, 3) ; IndoorsPlace = children_room)
(Image size = (500, 375, 3) ; IndoorsPlace = church_inside)
(Image size = (256, 256, 3) ; IndoorsPlace = classroom)
(Image size = (256, 256, 3) ; IndoorsPlace = cloister)
(Image size = (985, 767, 3) ; IndoorsPlace = closet)
(Image size = (256, 256, 3) ; IndoorsPlace = clothingstore)
(Image size = (800, 600, 3) ; IndoorsPlace = computerroom)
(Image size = (256, 256, 3) ; IndoorsPlace = concert_hall)
(Image size = (256, 256, 3) ; IndoorsPlace = corridor)
(Image size = (375, 500, 3) ; IndoorsPlace = deli)
(Image size = (500, 369, 3) ; IndoorsPlace = dentaloffice)
(Image size = (256, 256, 3) ; IndoorsPlace = dining_room)
(Image size = (471, 643, 3) ; IndoorsPlace = elevator)
(Image size = (400, 299, 3) ; IndoorsPlace = fastfood_restaurant)
(Image size = (319, 250, 3) ; IndoorsPlace = florist)
(Image size = (488, 346, 3) ; IndoorsPlace = gameroom)
(Image size = (432, 324, 3) ; IndoorsPlace = garage)
(Image size = (256, 256, 3) ; IndoorsPlace = greenhouse)
(Image size = (256, 256, 3) ; IndoorsPlace = grocerystore)
(Image size = (500, 332, 3) ; IndoorsPlace = gym)
(Image size = (256, 256, 3) ; IndoorsPlace = hairsalon)
(Image size = (642, 519, 3) ; IndoorsPlace = hospitalroom)
(Image size = (694, 390, 3) ; IndoorsPlace = inside_bus)
(Image size = (500, 333, 3) ; IndoorsPlace = inside_subway)
(Image size = (600, 450, 3) ; IndoorsPlace = jewelleryshop)
(Image size = (800, 735, 3) ; IndoorsPlace = kindergarden)
(Image size = (256, 256, 3) ; IndoorsPlace = kitchen)
(Image size = (416, 437, 3) ; IndoorsPlace = mall)
(Image size = (256, 256, 3) ; IndoorsPlace = meeting_room)
(Image size = (435, 310, 3) ; IndoorsPlace = movietheater)
(Image size = (500, 347, 3) ; IndoorsPlace = museum)
(Image size = (256, 256, 3) ; IndoorsPlace = nursery)
(Image size = (256, 256, 3) ; IndoorsPlace = office)
(Image size = (640, 480, 3) ; IndoorsPlace = operating_room)
(Image size = (500, 375, 3) ; IndoorsPlace = pantry)
(Image size = (400, 300, 3) ; IndoorsPlace = poolinside)
(Image size = (256, 256, 3) ; IndoorsPlace = prisoncell)
(Image size = (410, 307, 3) ; IndoorsPlace = restaurant)
(Image size = (512, 384, 3) ; IndoorsPlace = restaurant_kitchen)
(Image size = (338, 450, 3) ; IndoorsPlace = shoeshop)
(Image size = (256, 256, 3) ; IndoorsPlace = stairscase)
(Image size = (472, 354, 3) ; IndoorsPlace = studiomusic)
(Image size = (404, 500, 3) ; IndoorsPlace = subway)
(Image size = (500, 375, 3) ; IndoorsPlace = toystore)
(Image size = (530, 398, 3) ; IndoorsPlace = trainstation)
(Image size = (448, 336, 3) ; IndoorsPlace = tv_studio)
(Image size = (500, 342, 3) ; IndoorsPlace = videostore)
(Image size = (400, 299, 3) ; IndoorsPlace = waitingroom)
(Image size = (356, 400, 3) ; IndoorsPlace = warehouse)
(Image size = (288, 287, 3) ; IndoorsPlace = winecellar)
Load & Pre-Process Images¶
# RESIZE images to consistent size for TensorFlow to be able to FIT using
# tf.keras.preprocessing.image_dataset_from_directory (instead of looping)
# which has 14 Possible parameters that can be set.
# help(tf.keras.preprocessing.image_dataset_from_directory)
Setup parameters used to load and process the images which are currently stored as files in a directory.¶
# Set some parameters for tf.keras.preprocessing.image_dataset_from_directory
batch_size = 32
image_height = 256
image_width = 256
split = 0.60 # Used for sampling due to RAM issues
Train and Validation data creation¶
# First split off 60% of all the training data into temp_val_data
temp_val_data = tf.keras.preprocessing.image_dataset_from_directory(
img_dir,
labels='inferred', # labels are generated from the directory structure
label_mode='int', #'int': means labels are encoded as integers (e.g. for sparse_categorical_crossentropy loss).
validation_split= split,
subset="validation",
seed= 1001, # set seed
image_size=(image_height, image_width),
batch_size=batch_size)
Found 14056 files belonging to 61 classes. Using 8433 files for validation.
# Of the 60% split, further split temp_val_data batches into 1:1 ratio of training:validation.
val_batches = tf.data.experimental.cardinality(temp_val_data) # batch_size of 32, 8433/32 = ~264 batches
train_data = temp_val_data.take((val_batches) // 2) # 0.5 of the batches into train
split_data = temp_val_data.skip((val_batches) // 2) # Remaining 0.5 of the batches into split
# Further split split_data into val and test data in 1:1 ratio
val_batches = tf.data.experimental.cardinality(split_data)
val_data = split_data.take((val_batches) // 2)
test_data = split_data.skip((val_batches) // 2)
# Should be 30%, 15%, 15% of total original data into training, val, and test respectively
print(f'Batches in training = {len(train_data)}.')
print(f'Batches in split = {len(split_data)}.')
print(f'Batches in validation = {len(val_data)}.')
print(f'Batches in test = {len(test_data)}.')
Batches in training = 132. Batches in split = 132. Batches in validation = 66. Batches in test = 66.
Visualize the Images¶
for img, lab in train_data.take(1):
print(img[1].numpy().astype("uint16"))
print(f'minimum = {np.amin(img[0].numpy().astype("uint16"))}, maximum = {np.amax(img[0].numpy().astype("uint16"))}')
break
[[[225 211 200] [227 213 202] [227 213 202] ... [164 167 160] [225 225 218] [242 241 236]] [[200 186 175] [201 187 176] [201 187 176] ... [166 169 162] [221 221 213] [238 237 233]] [[201 187 174] [201 187 174] [200 186 173] ... [173 176 169] [226 225 218] [243 242 238]] ... [[ 42 43 47] [ 35 36 40] [ 30 31 35] ... [ 72 75 58] [141 142 129] [243 242 236]] [[ 15 16 20] [ 8 9 13] [ 3 4 8] ... [ 46 48 34] [138 138 126] [242 241 236]] [[132 133 135] [129 130 133] [125 126 129] ... [167 168 160] [231 231 224] [246 245 241]]] minimum = 0, maximum = 252
Let's display a set of the now uniform images.¶
# Plot one set of images in a space
# 4 images wide X 4 images tall
plt.figure(figsize=(12, 12))
for img, lab in train_data.take(1):
for i in range(16):
ax = plt.subplot(4, 4, i + 1)
plt.imshow(img[i].numpy().astype("uint16"))
# Map the label index to name
plt.title(IndoorImage_dir[lab[i]])
plt.axis("off")
Check the shape and size of the image data and the labels.¶
# Inspect the shapes of image batch and the labels batch
#type(train_data)
# the TF helps us creat the images and label in the dataset
for image_batch, labels_batch in train_data:
print(f'image_batch.shape = {image_batch.shape} \nlabels_batch.shape = {labels_batch.shape } ')
break
image_batch.shape = (32, 256, 256, 3) labels_batch.shape = (32,)
Configure the Dataset for Better performance¶
# Apply caching, shuffle, and prefetch
# to help increase speed, reduce memory usage, and make processing more efficient
AUTOTUNE = tf.data.AUTOTUNE # Tune the value dynamically at runtime.
train_data = train_data.cache().shuffle(1000).prefetch(buffer_size=AUTOTUNE)
val_data = val_data.cache().prefetch(buffer_size=AUTOTUNE)
test_data = test_data.cache().prefetch(buffer_size=AUTOTUNE)
Normalize the Data¶
# Typically the pixel values using 8 bits in the memoery, it denotes 2*2*..*2 =2^8 = 256 unique vlaues
# since Python starts from 0, 1, 2, ..., 255
#Normalize the pixel value to a number between 0 and 1 (instead of 0 and 255)
normalization_layer = tf.keras.layers.experimental.preprocessing.Rescaling(1.0/255.0, input_shape=(image_height, image_width, 3)) # added input shape so model1.summary() outputs correctly
Build a baseline CNN model on the training dataset.¶
model1 = tf.keras.Sequential([
normalization_layer,
# the filter size could use (3, 3) or a single number 3 means (3,3)
layers.Conv2D(16, 3, padding='same', activation='relu'),
layers.MaxPooling2D(),
# Double neurons to offset MaxPooling2D
layers.Conv2D(32, 3, padding='same', activation='relu'),
layers.MaxPooling2D(),
layers.Conv2D(64, 3, padding='same', activation='relu'),
layers.MaxPooling2D(),
layers.Conv2D(128, 3, padding='same', activation='relu'),
layers.MaxPooling2D(),
layers.Conv2D(256, 3, padding='same', activation='relu'),
layers.MaxPooling2D(),
layers.Conv2D(512, 3, padding='same', activation='relu'),
layers.MaxPooling2D(),
# the output is 4D (N, H, W, C)
# Convert 4D (N, H, W, C) to a 2D matrix by rearranging the data without loosing any info.
layers.Flatten(),
layers.Dense(512, activation='relu'),
# loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True) with activation=None due to label encoding as Int
# 61 target label classes
layers.Dense(61, activation=None)
])
# Tested lowered learning_rate from default 0.001 to 0.0001, but model was still overfit.
# Then tested increasing learning_rate from 0.001 to 0.005, but model was still overfit likely due to lack of Dropout.
# Running with default learning_rate = 0.001, which produces similar results to above 2 mentioned models.
opt = tf.keras.optimizers.Adam()
model1.compile(optimizer='adam',
# labels encoded as Int so use SparseCategoricalCrossentropy(from_logits=True), from_logits=True will perform the softmax activation function to convert to probabilities
loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
# Balanced image classification, use accuracy
metrics=['accuracy'])
model1.summary()
Model: "sequential"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
rescaling (Rescaling) (None, 256, 256, 3) 0
conv2d (Conv2D) (None, 256, 256, 16) 448
max_pooling2d (MaxPooling2 (None, 128, 128, 16) 0
D)
conv2d_1 (Conv2D) (None, 128, 128, 32) 4640
max_pooling2d_1 (MaxPoolin (None, 64, 64, 32) 0
g2D)
conv2d_2 (Conv2D) (None, 64, 64, 64) 18496
max_pooling2d_2 (MaxPoolin (None, 32, 32, 64) 0
g2D)
conv2d_3 (Conv2D) (None, 32, 32, 128) 73856
max_pooling2d_3 (MaxPoolin (None, 16, 16, 128) 0
g2D)
conv2d_4 (Conv2D) (None, 16, 16, 256) 295168
max_pooling2d_4 (MaxPoolin (None, 8, 8, 256) 0
g2D)
conv2d_5 (Conv2D) (None, 8, 8, 512) 1180160
max_pooling2d_5 (MaxPoolin (None, 4, 4, 512) 0
g2D)
flatten (Flatten) (None, 8192) 0
dense (Dense) (None, 512) 4194816
dense_1 (Dense) (None, 61) 31293
=================================================================
Total params: 5798877 (22.12 MB)
Trainable params: 5798877 (22.12 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
%%time
callback = tf.keras.callbacks.EarlyStopping(monitor='val_accuracy', patience= 7)
epochs= 100
history1 = model1.fit(
train_data,
validation_data=val_data,
epochs=epochs,
callbacks=[callback], verbose = 1
)
Epoch 1/100 132/132 [==============================] - 75s 384ms/step - loss: 3.9340 - accuracy: 0.0407 - val_loss: 3.8736 - val_accuracy: 0.0677 Epoch 2/100 132/132 [==============================] - 10s 76ms/step - loss: 3.8180 - accuracy: 0.0675 - val_loss: 3.7518 - val_accuracy: 0.0852 Epoch 3/100 132/132 [==============================] - 10s 77ms/step - loss: 3.6982 - accuracy: 0.0810 - val_loss: 3.6755 - val_accuracy: 0.0871 Epoch 4/100 132/132 [==============================] - 10s 76ms/step - loss: 3.5408 - accuracy: 0.1089 - val_loss: 3.5144 - val_accuracy: 0.1193 Epoch 5/100 132/132 [==============================] - 10s 76ms/step - loss: 3.3311 - accuracy: 0.1522 - val_loss: 3.3997 - val_accuracy: 0.1368 Epoch 6/100 132/132 [==============================] - 10s 77ms/step - loss: 3.1164 - accuracy: 0.1884 - val_loss: 3.2330 - val_accuracy: 0.1738 Epoch 7/100 132/132 [==============================] - 10s 76ms/step - loss: 2.8645 - accuracy: 0.2436 - val_loss: 3.2139 - val_accuracy: 0.1984 Epoch 8/100 132/132 [==============================] - 10s 76ms/step - loss: 2.5066 - accuracy: 0.3161 - val_loss: 3.2010 - val_accuracy: 0.2131 Epoch 9/100 132/132 [==============================] - 10s 76ms/step - loss: 1.9992 - accuracy: 0.4503 - val_loss: 3.5444 - val_accuracy: 0.2050 Epoch 10/100 132/132 [==============================] - 10s 76ms/step - loss: 1.4026 - accuracy: 0.5907 - val_loss: 3.8709 - val_accuracy: 0.2282 Epoch 11/100 132/132 [==============================] - 10s 78ms/step - loss: 0.7388 - accuracy: 0.7905 - val_loss: 5.4504 - val_accuracy: 0.2244 Epoch 12/100 132/132 [==============================] - 10s 76ms/step - loss: 0.4019 - accuracy: 0.8757 - val_loss: 6.8409 - val_accuracy: 0.2268 Epoch 13/100 132/132 [==============================] - 10s 76ms/step - loss: 0.2099 - accuracy: 0.9368 - val_loss: 7.3296 - val_accuracy: 0.2268 Epoch 14/100 132/132 [==============================] - 10s 76ms/step - loss: 0.1184 - accuracy: 0.9635 - val_loss: 7.6141 - val_accuracy: 0.2325 Epoch 15/100 132/132 [==============================] - 10s 78ms/step - loss: 0.0883 - accuracy: 0.9751 - val_loss: 8.2590 - val_accuracy: 0.2221 Epoch 16/100 132/132 [==============================] - 10s 77ms/step - loss: 0.0811 - accuracy: 0.9756 - val_loss: 9.2563 - val_accuracy: 0.2391 Epoch 17/100 132/132 [==============================] - 10s 76ms/step - loss: 0.0687 - accuracy: 0.9818 - val_loss: 9.3105 - val_accuracy: 0.2240 Epoch 18/100 132/132 [==============================] - 10s 77ms/step - loss: 0.0874 - accuracy: 0.9787 - val_loss: 8.8118 - val_accuracy: 0.2358 Epoch 19/100 132/132 [==============================] - 10s 77ms/step - loss: 0.1467 - accuracy: 0.9574 - val_loss: 7.9560 - val_accuracy: 0.2197 Epoch 20/100 132/132 [==============================] - 10s 78ms/step - loss: 0.0362 - accuracy: 0.9891 - val_loss: 9.5169 - val_accuracy: 0.2287 Epoch 21/100 132/132 [==============================] - 10s 77ms/step - loss: 0.0788 - accuracy: 0.9787 - val_loss: 9.5464 - val_accuracy: 0.2088 Epoch 22/100 132/132 [==============================] - 10s 79ms/step - loss: 0.0932 - accuracy: 0.9742 - val_loss: 9.0280 - val_accuracy: 0.2339 Epoch 23/100 132/132 [==============================] - 10s 78ms/step - loss: 0.0537 - accuracy: 0.9846 - val_loss: 9.4368 - val_accuracy: 0.2353 CPU times: user 4min 4s, sys: 9.19 s, total: 4min 13s Wall time: 5min 8s
Evaluate the Baseline CNN model on validation dataset.¶
train_history1 = pd.DataFrame(history1.history)
train_history1['epoch'] = [x + 1 for x in history1.epoch]
#Plot train loss
sns.lineplot(x='epoch', y ='loss', data =train_history1)
#Plot validation loss
sns.lineplot(x='epoch', y ='val_loss', data =train_history1)
#Add legends
plt.legend(labels=['train_loss', 'val_loss'])
<matplotlib.legend.Legend at 0x7b8c002345b0>
sns.lineplot(x='epoch', y ='accuracy', data =train_history1)
#Plot validation loss
sns.lineplot(x='epoch', y ='val_accuracy', data =train_history1)
#Add legends
plt.legend(labels=['train_accuracy', 'val_accuracy'])
<matplotlib.legend.Legend at 0x7b8ba06d9870>
Evaluate the Baseline CNN model on test dataset.¶
# Generate class predictions on test data
predict=model1.predict(test_data)
y_pred=np.argmax(predict,axis=1)
# Extract true labels from test_data
y_test = np.concatenate([y for x, y in test_data], axis=0)
# Overall accuracy of the model
total_accuracy = sum(1 for x, y in zip(y_test, y_pred) if x == y) / len(y_test)
print(f'The overall accuracy of model 1 on the test dataset is {total_accuracy:.4f}.')
66/66 [==============================] - 30s 142ms/step The overall accuracy of model 1 on the test dataset is 0.1812.
# Get the confusion matrix
cm = confusion_matrix(y_test, y_pred)
# Normalize the diagonal entries to get accuracies for each class
# If not every class is seen in the confusion matrix, may run into divide by 0 or nan issues which this ignores
with np.errstate(divide='ignore',invalid='ignore'):
accuracies = cm.diagonal() / cm.sum(axis=1)
# Note this code assumes every class is seen in the CM in order for proper functionality
# If a class is skipped, the loop disregards the class and will incorrectly label the accuracies
print("Accuracies for each class:")
for i, acc in enumerate(accuracies):
print(f"{IndoorImage_dir[i]}: {acc:.4f}")
print(f'Total classes: {len(accuracies)}') # Verify that 61 classes were seen
Accuracies for each class: airport_inside: 0.2174 artstudio: 0.0000 auditorium: 0.0769 bakery: 0.1594 bar: 0.1333 bathroom: 0.2069 bedroom: 0.3711 bookstore: 0.2182 bowling: 0.1429 buffet: 0.0588 casino: 0.1899 children_room: 0.0000 church_inside: 0.0500 classroom: 0.0476 cloister: 0.2500 closet: 0.2083 clothingstore: 0.0000 computerroom: 0.0556 concert_hall: 0.1053 corridor: 0.3191 deli: 0.0811 dentaloffice: 0.0588 dining_room: 0.0476 elevator: 0.2667 fastfood_restaurant: 0.0000 florist: 0.0667 gameroom: 0.0526 garage: 0.0000 greenhouse: 0.1176 grocerystore: 0.2609 gym: 0.0333 hairsalon: 0.1951 hospitalroom: 0.0000 inside_bus: 0.2000 inside_subway: 0.2208 jewelleryshop: 0.0000 kindergarden: 0.0556 kitchen: 0.3362 mall: 0.1379 meeting_room: 0.1471 movietheater: 0.5862 museum: 0.0000 nursery: 0.1429 office: 0.1000 operating_room: 0.1538 pantry: 0.3833 poolinside: 0.2821 prisoncell: 0.0625 restaurant: 0.1803 restaurant_kitchen: 0.0556 shoeshop: 0.0526 stairscase: 0.0000 studiomusic: 0.0000 subway: 0.3494 toystore: 0.0926 trainstation: 0.1290 tv_studio: 0.0312 videostore: 0.0000 waitingroom: 0.0588 warehouse: 0.2564 winecellar: 0.0294 Total classes: 61
print(classification_report(y_test, y_pred))
precision recall f1-score support
0 0.16 0.22 0.19 92
1 0.00 0.00 0.00 17
2 0.14 0.08 0.10 26
3 0.18 0.16 0.17 69
4 0.18 0.13 0.15 90
5 0.19 0.21 0.20 29
6 0.18 0.37 0.25 97
7 0.08 0.22 0.12 55
8 0.24 0.14 0.18 35
9 0.33 0.06 0.10 17
10 0.41 0.19 0.26 79
11 0.00 0.00 0.00 9
12 1.00 0.05 0.10 20
13 0.06 0.05 0.05 21
14 0.24 0.25 0.24 16
15 0.42 0.21 0.28 24
16 0.00 0.00 0.00 15
17 0.07 0.06 0.06 18
18 0.22 0.11 0.14 19
19 0.26 0.32 0.29 47
20 0.07 0.08 0.07 37
21 0.09 0.06 0.07 17
22 0.12 0.05 0.07 42
23 0.27 0.27 0.27 15
24 0.00 0.00 0.00 14
25 0.11 0.07 0.08 15
26 0.03 0.05 0.04 19
27 0.00 0.00 0.00 9
28 0.17 0.12 0.14 17
29 0.10 0.26 0.15 23
30 0.07 0.03 0.05 30
31 0.16 0.20 0.18 41
32 0.00 0.00 0.00 12
33 0.13 0.20 0.16 10
34 0.41 0.22 0.29 77
35 0.00 0.00 0.00 24
36 0.17 0.06 0.08 18
37 0.23 0.34 0.27 116
38 0.14 0.14 0.14 29
39 0.38 0.15 0.21 34
40 0.38 0.59 0.46 29
41 0.00 0.00 0.00 27
42 0.15 0.14 0.15 21
43 0.05 0.10 0.07 10
44 0.31 0.15 0.21 26
45 0.47 0.38 0.42 60
46 0.31 0.28 0.29 39
47 0.11 0.06 0.08 16
48 0.12 0.18 0.15 61
49 0.17 0.06 0.08 18
50 0.09 0.05 0.07 19
51 0.00 0.00 0.00 27
52 0.00 0.00 0.00 11
53 0.29 0.35 0.32 83
54 0.13 0.09 0.11 54
55 0.29 0.13 0.18 31
56 0.08 0.03 0.04 32
57 0.00 0.00 0.00 10
58 0.06 0.06 0.06 17
59 0.24 0.26 0.25 78
60 0.02 0.03 0.03 34
accuracy 0.18 2097
macro avg 0.17 0.13 0.13 2097
weighted avg 0.20 0.18 0.18 2097
It is clear from the validation graphs that the model is severely overfit. Both loss and accuracy curves diverge very significantly as training accuracy reached 98.5% whereas validation accuracy only reached 23.5%. I tested changing the learning rate in the Adam optimizer from 0.001 to (0.0001 and 0.005), but both yielded similar results (model still overfit). This is most likely due to the lack of Dropout in the model architecture, which would directly address the overfitting issue.
Evaluting the model on test data yielded an overall accuracy of 18% with a weighted F1-score of also 0.18. This model is not good and could definitely be improved to address overfitting and improve the accuracy.
Build a second CNN model with data augmentation and dropout.¶
# data augmentation layer to be included in model
data_aug = tf.keras.Sequential(
[
tf.keras.layers.experimental.preprocessing.RandomFlip("horizontal_and_vertical",
input_shape=(image_height,
image_width,
3)),
tf.keras.layers.experimental.preprocessing.RandomRotation(0.2),
tf.keras.layers.experimental.preprocessing.RandomTranslation(height_factor=0.2,width_factor = 0.2),
tf.keras.layers.experimental.preprocessing.RandomZoom(height_factor=(0.2, 0.2))
]
)
model2 = tf.keras.Sequential([
data_aug,
normalization_layer,
# the filter size could use (3, 3) or a single number 3 means (3,3)
layers.Conv2D(16, 3, padding='same', activation='relu'),
layers.MaxPooling2D(),
# Double neurons to offset MaxPooling2D
layers.Conv2D(32, 3, padding='same', activation='relu'),
layers.MaxPooling2D(),
layers.Conv2D(64, 3, padding='same', activation='relu'),
layers.MaxPooling2D(),
layers.Conv2D(128, 3, padding='same', activation='relu'),
layers.MaxPooling2D(),
layers.Conv2D(256, 3, padding='same', activation='relu'),
layers.MaxPooling2D(),
layers.Conv2D(512, 3, padding='same', activation='relu'),
layers.MaxPooling2D(),
layers.Dropout(0.2), # Dropout layer with 20% dropouts
# the output is 4D (N, H, W, C)
# Convert 4D (N, H, W, C) to a 2D matrix by rearranging the data without loosing any info.
layers.Flatten(),
layers.Dense(512, activation='relu'),
layers.Dropout(0.2), # Another dropout layer
# loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True) with activation=None due to label encoding as Int
# 61 target label classes
layers.Dense(61, activation=None)
])
model2.compile(optimizer='adam',
# labels encoded as Int so use SparseCategoricalCrossentropy(from_logits=True), from_logits=True will perform the softmax activation function to convert to probabilities
loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
# Balanced image classification, use accuracy
metrics=['accuracy'])
model2.summary()
Model: "sequential_2"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
sequential_1 (Sequential) (None, 256, 256, 3) 0
rescaling (Rescaling) (None, 256, 256, 3) 0
conv2d_6 (Conv2D) (None, 256, 256, 16) 448
max_pooling2d_6 (MaxPoolin (None, 128, 128, 16) 0
g2D)
conv2d_7 (Conv2D) (None, 128, 128, 32) 4640
max_pooling2d_7 (MaxPoolin (None, 64, 64, 32) 0
g2D)
conv2d_8 (Conv2D) (None, 64, 64, 64) 18496
max_pooling2d_8 (MaxPoolin (None, 32, 32, 64) 0
g2D)
conv2d_9 (Conv2D) (None, 32, 32, 128) 73856
max_pooling2d_9 (MaxPoolin (None, 16, 16, 128) 0
g2D)
conv2d_10 (Conv2D) (None, 16, 16, 256) 295168
max_pooling2d_10 (MaxPooli (None, 8, 8, 256) 0
ng2D)
conv2d_11 (Conv2D) (None, 8, 8, 512) 1180160
max_pooling2d_11 (MaxPooli (None, 4, 4, 512) 0
ng2D)
dropout (Dropout) (None, 4, 4, 512) 0
flatten_1 (Flatten) (None, 8192) 0
dense_2 (Dense) (None, 512) 4194816
dropout_1 (Dropout) (None, 512) 0
dense_3 (Dense) (None, 61) 31293
=================================================================
Total params: 5798877 (22.12 MB)
Trainable params: 5798877 (22.12 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
%%time
callback = tf.keras.callbacks.EarlyStopping(monitor='val_accuracy', patience= 7)
epochs= 100
history2 = model2.fit(
train_data,
validation_data=val_data,
epochs=epochs,
callbacks=[callback], verbose = 1
)
Epoch 1/100 132/132 [==============================] - 20s 122ms/step - loss: 3.9681 - accuracy: 0.0426 - val_loss: 3.9105 - val_accuracy: 0.0450 Epoch 2/100 132/132 [==============================] - 16s 121ms/step - loss: 3.9003 - accuracy: 0.0466 - val_loss: 3.8583 - val_accuracy: 0.0710 Epoch 3/100 132/132 [==============================] - 16s 120ms/step - loss: 3.8655 - accuracy: 0.0504 - val_loss: 3.8259 - val_accuracy: 0.0682 Epoch 4/100 132/132 [==============================] - 16s 122ms/step - loss: 3.8154 - accuracy: 0.0630 - val_loss: 3.7779 - val_accuracy: 0.0800 Epoch 5/100 132/132 [==============================] - 16s 123ms/step - loss: 3.7975 - accuracy: 0.0630 - val_loss: 3.7423 - val_accuracy: 0.0772 Epoch 6/100 132/132 [==============================] - 16s 120ms/step - loss: 3.7719 - accuracy: 0.0627 - val_loss: 3.7515 - val_accuracy: 0.0829 Epoch 7/100 132/132 [==============================] - 16s 121ms/step - loss: 3.7317 - accuracy: 0.0727 - val_loss: 3.6548 - val_accuracy: 0.1080 Epoch 8/100 132/132 [==============================] - 16s 120ms/step - loss: 3.6844 - accuracy: 0.0881 - val_loss: 3.6501 - val_accuracy: 0.1004 Epoch 9/100 132/132 [==============================] - 16s 120ms/step - loss: 3.6492 - accuracy: 0.0852 - val_loss: 3.5846 - val_accuracy: 0.1061 Epoch 10/100 132/132 [==============================] - 16s 120ms/step - loss: 3.6041 - accuracy: 0.0999 - val_loss: 3.6024 - val_accuracy: 0.1108 Epoch 11/100 132/132 [==============================] - 16s 120ms/step - loss: 3.5595 - accuracy: 0.0966 - val_loss: 3.5453 - val_accuracy: 0.1108 Epoch 12/100 132/132 [==============================] - 16s 120ms/step - loss: 3.5527 - accuracy: 0.1054 - val_loss: 3.4668 - val_accuracy: 0.1179 Epoch 13/100 132/132 [==============================] - 16s 122ms/step - loss: 3.5285 - accuracy: 0.1061 - val_loss: 3.4827 - val_accuracy: 0.1174 Epoch 14/100 132/132 [==============================] - 16s 120ms/step - loss: 3.4904 - accuracy: 0.1153 - val_loss: 3.4224 - val_accuracy: 0.1207 Epoch 15/100 132/132 [==============================] - 16s 120ms/step - loss: 3.4782 - accuracy: 0.1179 - val_loss: 3.4733 - val_accuracy: 0.1165 Epoch 16/100 132/132 [==============================] - 16s 120ms/step - loss: 3.4672 - accuracy: 0.1061 - val_loss: 3.4189 - val_accuracy: 0.1293 Epoch 17/100 132/132 [==============================] - 16s 120ms/step - loss: 3.4412 - accuracy: 0.1219 - val_loss: 3.4570 - val_accuracy: 0.1297 Epoch 18/100 132/132 [==============================] - 16s 122ms/step - loss: 3.4268 - accuracy: 0.1319 - val_loss: 3.5051 - val_accuracy: 0.1226 Epoch 19/100 132/132 [==============================] - 16s 121ms/step - loss: 3.4164 - accuracy: 0.1307 - val_loss: 3.3404 - val_accuracy: 0.1397 Epoch 20/100 132/132 [==============================] - 16s 123ms/step - loss: 3.4106 - accuracy: 0.1276 - val_loss: 3.4234 - val_accuracy: 0.1174 Epoch 21/100 132/132 [==============================] - 16s 123ms/step - loss: 3.3958 - accuracy: 0.1309 - val_loss: 3.3151 - val_accuracy: 0.1520 Epoch 22/100 132/132 [==============================] - 17s 128ms/step - loss: 3.3706 - accuracy: 0.1302 - val_loss: 3.3591 - val_accuracy: 0.1501 Epoch 23/100 132/132 [==============================] - 17s 129ms/step - loss: 3.3300 - accuracy: 0.1347 - val_loss: 3.2855 - val_accuracy: 0.1562 Epoch 24/100 132/132 [==============================] - 16s 124ms/step - loss: 3.3348 - accuracy: 0.1461 - val_loss: 3.3118 - val_accuracy: 0.1652 Epoch 25/100 132/132 [==============================] - 17s 125ms/step - loss: 3.3020 - accuracy: 0.1470 - val_loss: 3.2757 - val_accuracy: 0.1605 Epoch 26/100 132/132 [==============================] - 17s 126ms/step - loss: 3.2808 - accuracy: 0.1487 - val_loss: 3.2221 - val_accuracy: 0.1780 Epoch 27/100 132/132 [==============================] - 17s 125ms/step - loss: 3.2500 - accuracy: 0.1615 - val_loss: 3.2417 - val_accuracy: 0.1600 Epoch 28/100 132/132 [==============================] - 16s 124ms/step - loss: 3.2443 - accuracy: 0.1634 - val_loss: 3.2050 - val_accuracy: 0.1795 Epoch 29/100 132/132 [==============================] - 16s 124ms/step - loss: 3.2256 - accuracy: 0.1697 - val_loss: 3.2450 - val_accuracy: 0.1705 Epoch 30/100 132/132 [==============================] - 17s 126ms/step - loss: 3.1931 - accuracy: 0.1802 - val_loss: 3.1533 - val_accuracy: 0.1813 Epoch 31/100 132/132 [==============================] - 16s 125ms/step - loss: 3.1747 - accuracy: 0.1768 - val_loss: 3.1421 - val_accuracy: 0.2022 Epoch 32/100 132/132 [==============================] - 17s 126ms/step - loss: 3.1677 - accuracy: 0.1747 - val_loss: 3.2166 - val_accuracy: 0.1837 Epoch 33/100 132/132 [==============================] - 17s 127ms/step - loss: 3.1617 - accuracy: 0.1858 - val_loss: 3.2131 - val_accuracy: 0.1837 Epoch 34/100 132/132 [==============================] - 16s 122ms/step - loss: 3.1495 - accuracy: 0.1802 - val_loss: 3.2644 - val_accuracy: 0.1795 Epoch 35/100 132/132 [==============================] - 16s 122ms/step - loss: 3.1062 - accuracy: 0.1868 - val_loss: 3.1434 - val_accuracy: 0.1960 Epoch 36/100 132/132 [==============================] - 16s 121ms/step - loss: 3.0966 - accuracy: 0.1880 - val_loss: 3.1334 - val_accuracy: 0.2079 Epoch 37/100 132/132 [==============================] - 17s 131ms/step - loss: 3.0782 - accuracy: 0.1882 - val_loss: 3.0838 - val_accuracy: 0.2050 Epoch 38/100 132/132 [==============================] - 16s 121ms/step - loss: 3.0385 - accuracy: 0.2022 - val_loss: 3.1934 - val_accuracy: 0.1922 Epoch 39/100 132/132 [==============================] - 16s 121ms/step - loss: 3.0489 - accuracy: 0.1991 - val_loss: 3.0244 - val_accuracy: 0.2145 Epoch 40/100 132/132 [==============================] - 16s 121ms/step - loss: 3.0082 - accuracy: 0.2081 - val_loss: 3.0723 - val_accuracy: 0.2069 Epoch 41/100 132/132 [==============================] - 16s 121ms/step - loss: 3.0163 - accuracy: 0.2010 - val_loss: 3.0492 - val_accuracy: 0.2135 Epoch 42/100 132/132 [==============================] - 16s 121ms/step - loss: 2.9814 - accuracy: 0.2154 - val_loss: 3.1004 - val_accuracy: 0.2083 Epoch 43/100 132/132 [==============================] - 16s 121ms/step - loss: 2.9651 - accuracy: 0.2086 - val_loss: 3.0874 - val_accuracy: 0.2173 Epoch 44/100 132/132 [==============================] - 16s 121ms/step - loss: 2.9492 - accuracy: 0.2242 - val_loss: 3.0343 - val_accuracy: 0.2140 Epoch 45/100 132/132 [==============================] - 16s 123ms/step - loss: 2.9465 - accuracy: 0.2190 - val_loss: 3.1540 - val_accuracy: 0.2027 Epoch 46/100 132/132 [==============================] - 16s 122ms/step - loss: 2.9303 - accuracy: 0.2214 - val_loss: 3.0767 - val_accuracy: 0.2240 Epoch 47/100 132/132 [==============================] - 16s 121ms/step - loss: 2.9400 - accuracy: 0.2202 - val_loss: 3.0599 - val_accuracy: 0.2121 Epoch 48/100 132/132 [==============================] - 16s 121ms/step - loss: 2.8924 - accuracy: 0.2254 - val_loss: 2.9784 - val_accuracy: 0.2363 Epoch 49/100 132/132 [==============================] - 16s 121ms/step - loss: 2.8556 - accuracy: 0.2325 - val_loss: 2.9719 - val_accuracy: 0.2348 Epoch 50/100 132/132 [==============================] - 16s 121ms/step - loss: 2.8337 - accuracy: 0.2443 - val_loss: 3.0424 - val_accuracy: 0.2221 Epoch 51/100 132/132 [==============================] - 16s 123ms/step - loss: 2.8271 - accuracy: 0.2334 - val_loss: 2.9784 - val_accuracy: 0.2377 Epoch 52/100 132/132 [==============================] - 18s 133ms/step - loss: 2.8358 - accuracy: 0.2424 - val_loss: 3.0008 - val_accuracy: 0.2268 Epoch 53/100 132/132 [==============================] - 17s 128ms/step - loss: 2.8104 - accuracy: 0.2398 - val_loss: 3.0543 - val_accuracy: 0.2206 Epoch 54/100 132/132 [==============================] - 16s 122ms/step - loss: 2.7993 - accuracy: 0.2412 - val_loss: 2.9794 - val_accuracy: 0.2348 Epoch 55/100 132/132 [==============================] - 16s 120ms/step - loss: 2.7928 - accuracy: 0.2460 - val_loss: 3.0074 - val_accuracy: 0.2244 Epoch 56/100 132/132 [==============================] - 16s 123ms/step - loss: 2.7835 - accuracy: 0.2521 - val_loss: 3.1115 - val_accuracy: 0.2131 Epoch 57/100 132/132 [==============================] - 16s 121ms/step - loss: 2.7482 - accuracy: 0.2528 - val_loss: 3.0614 - val_accuracy: 0.2273 Epoch 58/100 132/132 [==============================] - 16s 121ms/step - loss: 2.7421 - accuracy: 0.2562 - val_loss: 3.0633 - val_accuracy: 0.2296 CPU times: user 12min 49s, sys: 15.2 s, total: 13min 4s Wall time: 17min 13s
Evaluate the second CNN model on validation dataset.¶
train_history2 = pd.DataFrame(history2.history)
train_history2['epoch'] = [x + 1 for x in history2.epoch]
sns.lineplot(x='epoch', y ='loss', data =train_history2)
sns.lineplot(x='epoch', y ='val_loss', data =train_history2)
plt.legend(labels=['train_loss', 'val_loss'])
<matplotlib.legend.Legend at 0x7b8ba04c1360>
sns.lineplot(x='epoch', y ='accuracy', data =train_history2)
sns.lineplot(x='epoch', y ='val_accuracy', data =train_history2)
plt.legend(labels=['train_accuracy', 'val_accuracy'])
<matplotlib.legend.Legend at 0x7b8b947ab640>
Evaluate the second CNN model on test dataset.¶
# Generate class predictions on test data
predict=model2.predict(test_data)
y_pred=np.argmax(predict,axis=1)
# Extract true labels from test_data
y_test = np.concatenate([y for x, y in test_data], axis=0)
# Overall accuracy of the model
total_accuracy = sum(1 for x, y in zip(y_test, y_pred) if x == y) / len(y_test)
print(f'The overall accuracy of model 2 on the test dataset is {total_accuracy:.4f}.')
66/66 [==============================] - 1s 16ms/step The overall accuracy of model 2 on the test dataset is 0.2504.
# Get the confusion matrix
cm = confusion_matrix(y_test, y_pred)
# Normalize the diagonal entries to get accuracies for each class
with np.errstate(divide='ignore',invalid='ignore'):
accuracies = cm.diagonal() / cm.sum(axis=1)
# Note this code assumes every class is seen in the CM in order for proper functionality
# If a class is skipped, the loop disregards the class and will incorrectly label the accuracies
print("Accuracies for each class:")
for i, acc in enumerate(accuracies):
print(f"{IndoorImage_dir[i]}: {acc:.4f}")
print(f'Total classes: {len(accuracies)}') # Verify that 61 classes were seen
Accuracies for each class: airport_inside: 0.5000 artstudio: 0.0000 auditorium: 0.1923 bakery: 0.1884 bar: 0.4778 bathroom: 0.2069 bedroom: 0.5773 bookstore: 0.1091 bowling: 0.2286 buffet: 0.0000 casino: 0.3165 children_room: 0.2222 church_inside: 0.0000 classroom: 0.0000 cloister: 0.0625 closet: 0.1667 clothingstore: 0.0000 computerroom: 0.2222 concert_hall: 0.0000 corridor: 0.3617 deli: 0.0000 dentaloffice: 0.0000 dining_room: 0.0238 elevator: 0.0000 fastfood_restaurant: 0.0714 florist: 0.0667 gameroom: 0.0000 garage: 0.0000 greenhouse: 0.3529 grocerystore: 0.1739 gym: 0.1000 hairsalon: 0.1707 hospitalroom: 0.0000 inside_bus: 0.1000 inside_subway: 0.4675 jewelleryshop: 0.0000 kindergarden: 0.2778 kitchen: 0.5259 mall: 0.0000 meeting_room: 0.1765 movietheater: 0.6207 museum: 0.0000 nursery: 0.0952 office: 0.1000 operating_room: 0.0385 pantry: 0.3333 poolinside: 0.6154 prisoncell: 0.0000 restaurant: 0.1475 restaurant_kitchen: 0.0000 shoeshop: 0.0000 stairscase: 0.0370 studiomusic: 0.0909 subway: 0.4940 toystore: 0.2593 trainstation: 0.0000 tv_studio: 0.1250 videostore: 0.0000 waitingroom: 0.0000 warehouse: 0.1282 winecellar: 0.3235 Total classes: 61
print(classification_report(y_test, y_pred))
precision recall f1-score support
0 0.29 0.50 0.37 92
1 0.00 0.00 0.00 17
2 0.19 0.19 0.19 26
3 0.65 0.19 0.29 69
4 0.20 0.48 0.28 90
5 0.67 0.21 0.32 29
6 0.25 0.58 0.35 97
7 0.30 0.11 0.16 55
8 0.40 0.23 0.29 35
9 0.00 0.00 0.00 17
10 0.45 0.32 0.37 79
11 0.33 0.22 0.27 9
12 0.00 0.00 0.00 20
13 0.00 0.00 0.00 21
14 0.50 0.06 0.11 16
15 0.31 0.17 0.22 24
16 0.00 0.00 0.00 15
17 0.11 0.22 0.15 18
18 0.00 0.00 0.00 19
19 0.22 0.36 0.28 47
20 0.00 0.00 0.00 37
21 0.00 0.00 0.00 17
22 0.09 0.02 0.04 42
23 0.00 0.00 0.00 15
24 0.06 0.07 0.06 14
25 1.00 0.07 0.12 15
26 0.00 0.00 0.00 19
27 0.00 0.00 0.00 9
28 1.00 0.35 0.52 17
29 0.36 0.17 0.24 23
30 0.15 0.10 0.12 30
31 0.11 0.17 0.13 41
32 0.00 0.00 0.00 12
33 0.33 0.10 0.15 10
34 0.32 0.47 0.38 77
35 0.00 0.00 0.00 24
36 0.36 0.28 0.31 18
37 0.23 0.53 0.32 116
38 0.00 0.00 0.00 29
39 0.17 0.18 0.17 34
40 0.38 0.62 0.47 29
41 0.00 0.00 0.00 27
42 0.67 0.10 0.17 21
43 0.05 0.10 0.06 10
44 0.17 0.04 0.06 26
45 0.30 0.33 0.32 60
46 0.55 0.62 0.58 39
47 0.00 0.00 0.00 16
48 0.07 0.15 0.10 61
49 0.00 0.00 0.00 18
50 0.00 0.00 0.00 19
51 0.25 0.04 0.06 27
52 0.10 0.09 0.10 11
53 0.35 0.49 0.41 83
54 0.48 0.26 0.34 54
55 0.00 0.00 0.00 31
56 0.18 0.12 0.15 32
57 0.00 0.00 0.00 10
58 0.00 0.00 0.00 17
59 0.23 0.13 0.16 78
60 0.18 0.32 0.23 34
accuracy 0.25 2097
macro avg 0.21 0.16 0.15 2097
weighted avg 0.25 0.25 0.22 2097
/usr/local/lib/python3.10/dist-packages/sklearn/metrics/_classification.py:1344: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior. _warn_prf(average, modifier, msg_start, len(result)) /usr/local/lib/python3.10/dist-packages/sklearn/metrics/_classification.py:1344: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior. _warn_prf(average, modifier, msg_start, len(result)) /usr/local/lib/python3.10/dist-packages/sklearn/metrics/_classification.py:1344: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior. _warn_prf(average, modifier, msg_start, len(result))
The inclusion of data augmentation and dropouts into the second model fixed the overfitting seen in the first model. The validation curves show both training and validation moving generally 1 to 1 with each other, and as soon as there was a divergence, early_stopping stopped the training.
Evaluation on the test data yielded an overall accuracy of 0.25 and weighted average f1-score of 0.22, which is a significant improvement over the first baseline model. Overall, I would say the second model is okay given there are 61 total classes so we do not expect a high accuracy like we would for a binary classification.
Build a third CNN model based on pre-trained model (transfer learning).¶
MobileNetV3Large_model = tf.keras.applications.MobileNetV3Large(input_shape = (image_height, image_width,3),
include_top=False, # Remove the fully-connected layer, we need to customize it
weights='imagenet') # Pre-training on ImageNet
# Uncomment to see the entire model layers
# MobileNetV3Large_model.summary()
WARNING:tensorflow:`input_shape` is undefined or non-square, or `rows` is not 224. Weights for input shape (224, 224) will be loaded as the default.
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/mobilenet_v3/weights_mobilenet_v3_large_224_1.0_float_no_top_v2.h5 12683000/12683000 [==============================] - 0s 0us/step
# Only want to train the last classification layer, freeze the convolutional base
MobileNetV3Large_model.trainable = False
# Import preprocessing from pretrained model
preprocess_input = tf.keras.applications.mobilenet_v3.preprocess_input
Flatten_layer = tf.keras.layers.Flatten()
inputs = tf.keras.Input(shape = (image_height, image_width, 3))
x = data_aug(inputs)
x = preprocess_input(x)
x = MobileNetV3Large_model(x, training=False)
# Using global average pooling rather than Flatten_layer because dataset is larger
global_average_layer = tf.keras.layers.GlobalAveragePooling2D()
x = global_average_layer(x)
# Add dropout layer and specify 61 target classes
x = tf.keras.layers.Dropout(0.2)(x)
prediction_layer = tf.keras.layers.Dense(61)
outputs = prediction_layer(x)
# when we apply funcitonal API approach, the last line of code should combine the inputs and outputs
model3 = tf.keras.Model(inputs, outputs)
model3.compile(optimizer='adam',
loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
metrics=['accuracy'])
model3.summary()
Model: "model"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input_2 (InputLayer) [(None, 256, 256, 3)] 0
sequential_1 (Sequential) (None, 256, 256, 3) 0
MobilenetV3large (Function (None, 8, 8, 960) 2996352
al)
global_average_pooling2d ( (None, 960) 0
GlobalAveragePooling2D)
dropout_2 (Dropout) (None, 960) 0
dense_4 (Dense) (None, 61) 58621
=================================================================
Total params: 3054973 (11.65 MB)
Trainable params: 58621 (228.99 KB)
Non-trainable params: 2996352 (11.43 MB)
_________________________________________________________________
%%time
callback = tf.keras.callbacks.EarlyStopping(monitor='val_accuracy', patience= 7)
epochs= 100
history3 = model3.fit(
train_data,
validation_data=val_data,
epochs=epochs,
callbacks=[callback], verbose = 1
)
Epoch 1/100 132/132 [==============================] - 16s 85ms/step - loss: 3.6506 - accuracy: 0.1484 - val_loss: 2.7626 - val_accuracy: 0.3021 Epoch 2/100 132/132 [==============================] - 10s 77ms/step - loss: 2.5468 - accuracy: 0.3293 - val_loss: 2.0341 - val_accuracy: 0.4474 Epoch 3/100 132/132 [==============================] - 10s 76ms/step - loss: 2.1783 - accuracy: 0.4150 - val_loss: 1.8733 - val_accuracy: 0.4801 Epoch 4/100 132/132 [==============================] - 10s 74ms/step - loss: 1.9621 - accuracy: 0.4543 - val_loss: 1.7506 - val_accuracy: 0.5161 Epoch 5/100 132/132 [==============================] - 10s 76ms/step - loss: 1.8438 - accuracy: 0.4912 - val_loss: 1.6815 - val_accuracy: 0.5407 Epoch 6/100 132/132 [==============================] - 10s 78ms/step - loss: 1.7187 - accuracy: 0.5182 - val_loss: 1.5545 - val_accuracy: 0.5795 Epoch 7/100 132/132 [==============================] - 12s 92ms/step - loss: 1.6389 - accuracy: 0.5336 - val_loss: 1.5500 - val_accuracy: 0.5720 Epoch 8/100 132/132 [==============================] - 12s 92ms/step - loss: 1.5828 - accuracy: 0.5471 - val_loss: 1.5760 - val_accuracy: 0.5672 Epoch 9/100 132/132 [==============================] - 10s 77ms/step - loss: 1.5077 - accuracy: 0.5760 - val_loss: 1.4728 - val_accuracy: 0.5942 Epoch 10/100 132/132 [==============================] - 10s 77ms/step - loss: 1.4706 - accuracy: 0.5805 - val_loss: 1.4555 - val_accuracy: 0.5971 Epoch 11/100 132/132 [==============================] - 12s 92ms/step - loss: 1.4052 - accuracy: 0.6004 - val_loss: 1.4572 - val_accuracy: 0.6108 Epoch 12/100 132/132 [==============================] - 12s 92ms/step - loss: 1.3955 - accuracy: 0.5919 - val_loss: 1.4271 - val_accuracy: 0.6061 Epoch 13/100 132/132 [==============================] - 12s 92ms/step - loss: 1.3501 - accuracy: 0.6122 - val_loss: 1.4579 - val_accuracy: 0.6056 Epoch 14/100 132/132 [==============================] - 10s 77ms/step - loss: 1.3409 - accuracy: 0.6158 - val_loss: 1.4243 - val_accuracy: 0.6174 Epoch 15/100 132/132 [==============================] - 10s 77ms/step - loss: 1.2868 - accuracy: 0.6214 - val_loss: 1.4344 - val_accuracy: 0.6155 Epoch 16/100 132/132 [==============================] - 10s 75ms/step - loss: 1.2949 - accuracy: 0.6257 - val_loss: 1.4380 - val_accuracy: 0.6198 Epoch 17/100 132/132 [==============================] - 12s 93ms/step - loss: 1.2672 - accuracy: 0.6281 - val_loss: 1.4327 - val_accuracy: 0.6084 Epoch 18/100 132/132 [==============================] - 10s 75ms/step - loss: 1.2403 - accuracy: 0.6409 - val_loss: 1.4619 - val_accuracy: 0.6113 Epoch 19/100 132/132 [==============================] - 10s 76ms/step - loss: 1.2191 - accuracy: 0.6399 - val_loss: 1.5372 - val_accuracy: 0.6042 Epoch 20/100 132/132 [==============================] - 10s 77ms/step - loss: 1.1883 - accuracy: 0.6527 - val_loss: 1.4919 - val_accuracy: 0.6174 Epoch 21/100 132/132 [==============================] - 10s 76ms/step - loss: 1.1911 - accuracy: 0.6465 - val_loss: 1.5443 - val_accuracy: 0.6042 Epoch 22/100 132/132 [==============================] - 12s 92ms/step - loss: 1.1434 - accuracy: 0.6579 - val_loss: 1.4749 - val_accuracy: 0.6151 Epoch 23/100 132/132 [==============================] - 12s 92ms/step - loss: 1.1607 - accuracy: 0.6487 - val_loss: 1.4688 - val_accuracy: 0.6193 CPU times: user 1min 35s, sys: 9.83 s, total: 1min 45s Wall time: 4min 40s
Evaluate the third pre-trained CNN model on validation dataset.¶
train_history3 = pd.DataFrame(history3.history)
train_history3['epoch'] = [x + 1 for x in history3.epoch]
sns.lineplot(x='epoch', y ='loss', data =train_history3)
sns.lineplot(x='epoch', y ='val_loss', data =train_history3)
plt.legend(labels=['train_loss', 'val_loss'])
<matplotlib.legend.Legend at 0x7b8b9541ee00>
sns.lineplot(x='epoch', y ='accuracy', data =train_history3)
sns.lineplot(x='epoch', y ='val_accuracy', data =train_history3)
plt.legend(labels=['train_accuracy', 'val_accuracy'])
<matplotlib.legend.Legend at 0x7b8b88665240>
Evaluate the third pre-trained CNN model on test dataset.¶
# Generate class predictions on test data
predict=model3.predict(test_data)
y_pred=np.argmax(predict,axis=1)
# Extract true labels from test_data
y_test = np.concatenate([y for x, y in test_data], axis=0)
# Overall accuracy of the model
total_accuracy = sum(1 for x, y in zip(y_test, y_pred) if x == y) / len(y_test)
print(f'The overall accuracy of model 3 on the test dataset is {total_accuracy:.4f}.')
66/66 [==============================] - 4s 47ms/step The overall accuracy of model 3 on the test dataset is 0.6390.
# Get the confusion matrix
cm = confusion_matrix(y_test, y_pred)
# Normalize the diagonal entries to get accuracies for each class
with np.errstate(divide='ignore',invalid='ignore'):
accuracies = cm.diagonal() / cm.sum(axis=1)
# Note this code assumes every class is seen in the CM in order for proper functionality
# If a class is skipped, the loop disregards the class and will incorrectly label the accuracies
print("Accuracies for each class:")
for i, acc in enumerate(accuracies):
print(f"{IndoorImage_dir[i]}: {acc:.4f}")
print(f'Total classes: {len(accuracies)}') # Verify that 61 classes were seen
Accuracies for each class: airport_inside: 0.5543 artstudio: 0.4706 auditorium: 0.5000 bakery: 0.7681 bar: 0.6778 bathroom: 0.6897 bedroom: 0.8969 bookstore: 0.7091 bowling: 0.8857 buffet: 0.8824 casino: 0.6835 children_room: 0.1111 church_inside: 0.6500 classroom: 0.5714 cloister: 1.0000 closet: 0.8333 clothingstore: 0.5333 computerroom: 0.6111 concert_hall: 0.0000 corridor: 0.7447 deli: 0.3514 dentaloffice: 0.6471 dining_room: 0.0714 elevator: 0.4667 fastfood_restaurant: 0.1429 florist: 0.8000 gameroom: 0.7368 garage: 0.5556 greenhouse: 1.0000 grocerystore: 0.6522 gym: 0.8333 hairsalon: 0.2195 hospitalroom: 0.2500 inside_bus: 0.6000 inside_subway: 0.9481 jewelleryshop: 0.0417 kindergarden: 0.5000 kitchen: 0.8793 mall: 0.5172 meeting_room: 0.5294 movietheater: 0.6207 museum: 0.0000 nursery: 0.7619 office: 0.7000 operating_room: 0.2692 pantry: 0.8500 poolinside: 0.7179 prisoncell: 0.3750 restaurant: 0.6066 restaurant_kitchen: 0.7222 shoeshop: 0.1579 stairscase: 0.7407 studiomusic: 0.9091 subway: 0.6867 toystore: 0.7037 trainstation: 0.4839 tv_studio: 0.2812 videostore: 0.4000 waitingroom: 0.3529 warehouse: 0.8462 winecellar: 0.6176 Total classes: 61
print(classification_report(y_test, y_pred))
precision recall f1-score support
0 0.67 0.55 0.61 92
1 1.00 0.47 0.64 17
2 0.36 0.50 0.42 26
3 0.67 0.77 0.72 69
4 0.59 0.68 0.63 90
5 0.80 0.69 0.74 29
6 0.64 0.90 0.75 97
7 0.60 0.71 0.65 55
8 0.97 0.89 0.93 35
9 0.71 0.88 0.79 17
10 0.93 0.68 0.79 79
11 0.12 0.11 0.12 9
12 0.93 0.65 0.76 20
13 0.63 0.57 0.60 21
14 0.76 1.00 0.86 16
15 0.80 0.83 0.82 24
16 0.89 0.53 0.67 15
17 0.52 0.61 0.56 18
18 0.00 0.00 0.00 19
19 0.51 0.74 0.60 47
20 0.45 0.35 0.39 37
21 0.55 0.65 0.59 17
22 0.75 0.07 0.13 42
23 0.50 0.47 0.48 15
24 0.18 0.14 0.16 14
25 0.86 0.80 0.83 15
26 0.82 0.74 0.78 19
27 0.18 0.56 0.27 9
28 1.00 1.00 1.00 17
29 0.56 0.65 0.60 23
30 0.57 0.83 0.68 30
31 0.69 0.22 0.33 41
32 0.20 0.25 0.22 12
33 1.00 0.60 0.75 10
34 0.62 0.95 0.75 77
35 0.33 0.04 0.07 24
36 0.82 0.50 0.62 18
37 0.71 0.88 0.78 116
38 0.65 0.52 0.58 29
39 0.53 0.53 0.53 34
40 0.72 0.62 0.67 29
41 0.00 0.00 0.00 27
42 0.94 0.76 0.84 21
43 0.12 0.70 0.21 10
44 0.70 0.27 0.39 26
45 0.74 0.85 0.79 60
46 0.90 0.72 0.80 39
47 1.00 0.38 0.55 16
48 0.51 0.61 0.56 61
49 0.65 0.72 0.68 18
50 0.75 0.16 0.26 19
51 0.71 0.74 0.73 27
52 0.33 0.91 0.49 11
53 0.68 0.69 0.68 83
54 0.68 0.70 0.69 54
55 0.65 0.48 0.56 31
56 0.64 0.28 0.39 32
57 0.57 0.40 0.47 10
58 0.67 0.35 0.46 17
59 0.77 0.85 0.80 78
60 0.70 0.62 0.66 34
accuracy 0.64 2097
macro avg 0.63 0.58 0.57 2097
weighted avg 0.66 0.64 0.62 2097
/usr/local/lib/python3.10/dist-packages/sklearn/metrics/_classification.py:1344: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior. _warn_prf(average, modifier, msg_start, len(result)) /usr/local/lib/python3.10/dist-packages/sklearn/metrics/_classification.py:1344: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior. _warn_prf(average, modifier, msg_start, len(result)) /usr/local/lib/python3.10/dist-packages/sklearn/metrics/_classification.py:1344: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior. _warn_prf(average, modifier, msg_start, len(result))
Recommend the best model¶
It is evident that the pre-trained model 3 is by far the best model. Note there are no signs of overfitting as the training/validation curves generally move 1:1 with each other.
Evaluating model 3 on the test data yielded an overall accuracy of 64% and weighted f1-score of 0.62, by far the highest we have seen from any model. Model 3 was also the quickest to train. All of these factors demonstrate the usefulness of a pre-trained model and transfer learning.
%%shell
jupyter nbconvert --to html /content/FinalProject_WangKevin_TF.ipynb
[NbConvertApp] Converting notebook /content/FinalProject_WangKevin_TF.ipynb to html [NbConvertApp] Writing 23886533 bytes to /content/FinalProject_WangKevin_TF.html